install.packages("sf")
install.packages("sf")
remove.packages("sf")
install.packages("sf")
remove.packages("sf")
install.packages("sf")
install.packages("sf")
install.packages("sf")
install.packages("sf")
install.packages("sf")
install.packages("sf")
renv::activate()
renv::deactivate()
renv::activate()
renv::status()
install.packages("sf")
library(sf)
sf_use_s2(FALSE)
africa_grid_covariates <- st_read(paste0(global_data_dir,"clean/gridded_covariates_snl.gpkg"))
africa_grid_covariates <- st_read(paste0("Data/ML Prediction/Predictors/gridded_covariates_snl.gpkg"))
agg_png("./Figures/Figure-A-1-Common-Support.png",
width = 14, height = 14, res = 700, units = "in")
library(ragg)
library(tidyverse)
library(ragg)
install.packages("tidyverse")
agg_png("./Figures/Figure-A-1-Common-Support.png",
width = 14, height = 14, res = 700, units = "in")
library(tidyverse)
library(ragg)
agg_png("./Figures/Figure-A-1-Common-Support.png",
width = 14, height = 14, res = 700, units = "in")
africa_grid_covariates %>%
group_by(gid) %>%
mutate(missing_ERA = is.na(ERA_Arc),
missing_glg = is.na(GLG_K),
missing_sub = is.na(SUBauid),
missing_mag = is.na(mag)) %>%
ungroup() %>%
select(gid, starts_with("missing")) %>%
pivot_longer( cols = starts_with("missing"), values_to = "Missingness") %>%
mutate(name = case_when(name == "missing_ERA" ~ "Missing Age of the Bedrock",
name == "missing_glg" ~ "Missing Geological Type",
name == "missing_mag" ~ "Missing Electromagnatism",
name == "missing_sub" ~ "Missing Geological Subtype")) %>%
{ggplot() +
geom_sf(aes(fill = Missingness), size = .01, data = .) +
facet_wrap(~name) +
scale_fill_brewer(palette = "Set1", direction = -1) +
theme(text = element_text(size=20))+
labs(title = "Missingness of Geological Predictors")}
dev.off()
africa_grid_covariates <- st_read(paste0("Data/ML Prediction/Predictors/gridded_covariates_snl.gpkg"))
agg_png("./Figures/Figure-A-1-Common-Support.png",
width = 14, height = 14, res = 700, units = "in")
africa_grid_covariates %>%
group_by(gid) %>%
mutate(missing_ERA = is.na(ERA_Arc),
missing_glg = is.na(GLG_K),
missing_sub = is.na(SUBauid),
missing_mag = is.na(mag)) %>%
ungroup() %>%
select(gid, starts_with("missing")) %>%
pivot_longer( cols = starts_with("missing"), values_to = "Missingness") %>%
mutate(name = case_when(name == "missing_ERA" ~ "Missing Age of the Bedrock",
name == "missing_glg" ~ "Missing Geological Type",
name == "missing_mag" ~ "Missing Electromagnatism",
name == "missing_sub" ~ "Missing Geological Subtype")) %>%
{ggplot() +
geom_sf(aes(fill = Missingness), size = .01, data = .) +
facet_wrap(~name) +
scale_fill_brewer(palette = "Set1", direction = -1) +
theme(text = element_text(size=20))+
labs(title = "Missingness of Geological Predictors")}
dev.off()
agg_png("./Figures/Figure-A-1-Common-Support.png",
width = 14, height = 14, res = 700, units = "in")
africa_grid_covariates %>%
group_by(gid) %>%
mutate(missing_ERA = is.na(ERA_Arc),
missing_glg = is.na(GLG_K),
missing_sub = is.na(SUBauid),
missing_mag = is.na(mag)) %>%
ungroup() %>%
select(gid, starts_with("missing")) %>%
pivot_longer( cols = starts_with("missing"), values_to = "Missingness") %>%
mutate(name = case_when(name == "missing_ERA" ~ "Missing Age of the Bedrock",
name == "missing_glg" ~ "Missing Geological Type",
name == "missing_mag" ~ "Missing Electromagnatism",
name == "missing_sub" ~ "Missing Geological Subtype")) %>%
{ggplot() +
geom_sf(aes(fill = Missingness), color = NA, size = .01, data = .) +
facet_wrap(~name) +
scale_fill_brewer(palette = "Set1", direction = -1) +
theme(text = element_text(size=20))+
labs(title = "Missingness of Geological Predictors")}
dev.off()
asm_sites_cod <- st_read("Data/IPIS/cod_mines_curated_all_opendata_p_ipis.shp")
# this code was written before the release of s2
sf_use_s2(FALSE)
asm_sites_cod <- st_read("Data/IPIS/cod_mines_curated_all_opendata_p_ipis.shp")
world <- st_read("Data/GADM/gadm36_levels.gpkg", "level1") %>%
st_transform(st_crs(asm_sites_cod)) #it's already in 4326 but slight differences in WKT causes error
world <- st_read("Data/GADM/gadm36_levels.gpkg", "level1") %>%
st_transform(st_crs(asm_sites_cod)) #it's already in 4326 but slight differences in WKT causes error
congo <- world %>% filter(NAME_0 == "Democratic Republic of the Congo")
congo_provinces_of_interest <- c("Ituri",
"Sud-Kivu",
"Nord-Kivu",
"Haut-Katanga",
"Maniema",
"Tanganyika",
"Haut-Lomami",
"Tshopo")
congo_provinces_of_interest <- congo %>%
dplyr::filter(NAME_1 %in% congo_provinces_of_interest)
mining_permits_cod <- st_read("Data/Mining Permits/Democratic_Republic_of_the_Congo_mining_permits.shp")
mining_permits_cod_filtered <-
mining_permits_cod %>%
filter(grepl("Actif", statut)) %>%
filter(grepl("Au|Ta|Co|Cu|Diamant|Sn|W", resource))
agg_png("./Figures/Figure-A-2-IPIS-and-mining-permits.png",
width = 14, height = 14, res = 700, units = "in")
ggplot() +
geom_sf(data = congo) +
geom_sf(aes(fill = "Training Area"), data = congo_provinces_of_interest)+
geom_sf(aes(color = "Mining Concession"),data = mining_permits_cod_filtered, fill = NA) +
geom_sf(aes(shape = "ASM"), data = asm_sites_cod, color = "orange", alpha = .2) +
labs(title = "Congo ASM points and Mining Concessions", color = "", fill = "", shape = "") +
scale_colour_manual(values = c("darkred")) +
scale_fill_manual(values = c("grey20"))
dev.off()
pred_ASM_robust_files <- list.files("Data/ML Prediction/Predictions",
"prediction_model_ASM_gold_\\d+_individual_hp\\.dill\\.csv")
pred_ASM_robust_seeds <- pred_ASM_robust_files %>%
str_split("_|\\.") %>%
sapply(`[`, 5) %>%
as.numeric()
voted_asm <-
lapply(paste0(global_data_dir_pred, "predictions/", pred_ASM_robust_files), read_csv) %>%
bind_rows()
voted_asm <-
lapply(paste0("Data/ML Prediction/Predictions", pred_ASM_robust_files), read_csv) %>%
bind_rows()
voted_asm <-
lapply(paste0("Data/ML Prediction/Predictions/", pred_ASM_robust_files), read_csv) %>%
bind_rows()
agg_png("./Figures/Figure-A-6-Predicted-vs-Actual.png",
width = 7, height = 4, units = "in", res = 300)
voted_asm  %>%
group_by(gid) %>%
summarise(ASM_prob = mean(ASM_prob)) %>%
ungroup() %>%
mutate(ASM_actual = gid %in% grids_asm) %>%
ggplot +
geom_boxplot(aes(ASM_actual, ASM_prob), alpha = .05) +
labs(title = "ASM Probabilities of Predicted vs Actual ASM", y = "Predicted Probability of ASM", x = "Actual ASM") +
coord_flip()
grids <- st_read("Data/ML Prediction/Predictors/gridded_covariates_snl.gpkg")
grids_asm <-
grids %>%
{c(grids$gid[sapply(st_intersects(., asm_sites_cod %>% filter(is_gold_mi == 1)), function(x){length(x)> 0})],
grids$gid[sapply(st_intersects(., asm_sites_tza %>% filter(grepl("Gold",mineral1na))), function(x){length(x)> 0})],
grids$gid[sapply(st_intersects(., asm_sites_burkina), function(x){length(x)> 0})])}
# this section is needed to subset to the GIDs within the predictive region
asm_sites_cod <- st_read("Data/IPIS/cod_mines_curated_all_opendata_p_ipis.shp")
asm_sites_tza <- st_read("Data/IPIS/tza_mines_curated_all_opendata_p_ipis.shp")
asm_sites_burkina <- st_read("Data/Burkina ASM/Sites_Potentiels.shp")
grids <- st_read("Data/ML Prediction/Predictors/gridded_covariates_snl.gpkg")
grids_asm <-
grids %>%
{c(grids$gid[sapply(st_intersects(., asm_sites_cod %>% filter(is_gold_mi == 1)), function(x){length(x)> 0})],
grids$gid[sapply(st_intersects(., asm_sites_tza %>% filter(grepl("Gold",mineral1na))), function(x){length(x)> 0})],
grids$gid[sapply(st_intersects(., asm_sites_burkina), function(x){length(x)> 0})])}
agg_png("./Figures/Figure-A-6-Predicted-vs-Actual.png",
width = 7, height = 4, units = "in", res = 300)
voted_asm  %>%
group_by(gid) %>%
summarise(ASM_prob = mean(ASM_prob)) %>%
ungroup() %>%
mutate(ASM_actual = gid %in% grids_asm) %>%
ggplot +
geom_boxplot(aes(ASM_actual, ASM_prob), alpha = .05) +
labs(title = "ASM Probabilities of Predicted vs Actual ASM", y = "Predicted Probability of ASM", x = "Actual ASM") +
coord_flip()
dev.off()
grids <- st_read("Data/ML Prediction/Predictors/snl_gridded_collapsed.gpkg")
grids_asm <-
grids %>%
{c(grids$gid[sapply(st_intersects(., asm_sites_cod %>% filter(is_gold_mi == 1)), function(x){length(x)> 0})],
grids$gid[sapply(st_intersects(., asm_sites_tza %>% filter(grepl("Gold",mineral1na))), function(x){length(x)> 0})],
grids$gid[sapply(st_intersects(., asm_sites_burkina), function(x){length(x)> 0})])}
agg_png("./Figures/Figure-A-6-Predicted-vs-Actual.png",
width = 7, height = 4, units = "in", res = 300)
voted_asm  %>%
group_by(gid) %>%
summarise(ASM_prob = mean(ASM_prob)) %>%
ungroup() %>%
mutate(ASM_actual = gid %in% grids_asm) %>%
ggplot +
geom_boxplot(aes(ASM_actual, ASM_prob), alpha = .05) +
labs(title = "ASM Probabilities of Predicted vs Actual ASM", y = "Predicted Probability of ASM", x = "Actual ASM") +
coord_flip()
dev.off()
grids <- st_read("Data/ML Prediction/Predictors/gridded_covariates_snl.gpkg")
grids_asm <-
grids %>%
{c(grids$gid[sapply(st_intersects(., asm_sites_cod %>% filter(is_gold_mi == 1)), function(x){length(x)> 0})],
grids$gid[sapply(st_intersects(., asm_sites_tza %>% filter(grepl("Gold",mineral1na))), function(x){length(x)> 0})],
grids$gid[sapply(st_intersects(., asm_sites_burkina), function(x){length(x)> 0})])}
agg_png("./Figures/Figure-A-6-Predicted-vs-Actual.png",
width = 7, height = 4, units = "in", res = 300)
voted_asm  %>%
group_by(gid) %>%
summarise(ASM_prob = mean(ASM_prob)) %>%
ungroup() %>%
mutate(ASM_actual = gid %in% grids_asm) %>%
ggplot +
geom_boxplot(aes(ASM_actual, ASM_prob), alpha = .05) +
labs(title = "ASM Probabilities of Predicted vs Actual ASM", y = "Predicted Probability of ASM", x = "Actual ASM") +
coord_flip()
dev.off()
######
# ML Predictions
######
pred_ASM_robust_files <- list.files("Data/ML Prediction/Predictions",
"prediction_model_ASM_\\d+_individual_hp\\.dill\\.csv")
pred_ASM_robust_seeds <- pred_ASM_robust_files %>%
str_split("_|\\.") %>%
sapply(`[`, 5) %>%
as.numeric()
voted_asm <-
lapply(paste0("Data/ML Prediction/Predictions/", pred_ASM_robust_files), read_csv) %>%
bind_rows()
agg_png("./Figures/Figure-A-6-Predicted-vs-Actual.png",
width = 7, height = 4, units = "in", res = 300)
voted_asm  %>%
group_by(gid) %>%
summarise(ASM_prob = mean(ASM_prob)) %>%
ungroup() %>%
mutate(ASM_actual = gid %in% grids_asm) %>%
ggplot +
geom_boxplot(aes(ASM_actual, ASM_prob), alpha = .05) +
labs(title = "ASM Probabilities of Predicted vs Actual ASM", y = "Predicted Probability of ASM", x = "Actual ASM") +
coord_flip()
dev.off()
library(tidyverse)
library(ragg)
lat_robust_csv <- read_csv("./Data/ML Prediction/Scores/lat_split.csv")
agg_png("./Figures/lat_robust.png",
width = 7, height = 4,
units = "in", res = 300)
lat_robust_csv %>%
ggplot() +
geom_point(aes(sample_included, scores)) +
ylim(0,1) +
labs(title = "Out of Sample Geographic Accuracy",
subtitle = "Latitude-split geographic cross validation",
x = "Size of CV fold sample",
y = "Accuracy")
dev.off()
agg_png("./Figures/Figure-A-7-Out-of-Sample-Lat.png",
width = 7, height = 4,
units = "in", res = 300)
lat_robust_csv %>%
ggplot() +
geom_point(aes(sample_included, scores)) +
ylim(0,1) +
labs(title = "Out of Sample Geographic Accuracy",
subtitle = "Latitude-split geographic cross validation",
x = "Size of CV fold sample",
y = "Accuracy")
dev.off()
lat_robust_csv <- read_csv("./Data/ML Prediction/Scores/lon_split.csv")
agg_png("./Figures/Figure-A-8-Out-of-Sample-Lon.png",
width = 7, height = 4,
units = "in", res = 300)
lat_robust_csv %>%
ggplot() +
geom_point(aes(sample_included, scores)) +
ylim(0,1) +
labs(title = "Out of Sample Geographic Accuracy",
subtitle = "Latitude-split geographic cross validation",
x = "Size of CV fold sample",
y = "Accuracy")
dev.off()
library(sf)
asm_vote_share_grids <-
grids %>%
select(gid) %>%
left_join(voted_asm %>%
group_by(gid) %>%
summarise("Mean of Predicted Probability" = mean(ASM_prob),
"SD of Predicted Probability" = sd(ASM_prob)) %>%
ungroup() %>%
pivot_longer(-c(gid), names_to = "variable", values_to = "value"))
agg_png("./Figures/Figure-A-10-pedicted-probability-map.png", width = 7, height = 4, units = "in", res = 300)
asm_vote_share_grids %>%
filter(!is.na(value)) %>%
ggplot()+
geom_sf(aes(fill = value), size = 0) +
facet_grid(cols = vars(variable)) +
scale_fill_viridis_c(option = "D", begin = .5, end = 0) +
labs(title = "Predicted Probability of ASM")
dev.off()
agg_png("./Figures/Figure-A-10-pedicted-probability-map.png", width = 7, height = 4, units = "in", res = 300)
asm_vote_share_grids %>%
filter(!is.na(value)) %>%
ggplot()+
geom_sf(aes(fill = value), size = 0, color = NA) +
facet_grid(cols = vars(variable)) +
scale_fill_viridis_c(option = "D", begin = .5, end = 0) +
labs(title = "Predicted Probability of ASM")
dev.off()
library(tidyverse)
library(ragg)
library(sf)
# this code was written before the release of sf
sf_use_s2(FALSE)
######
# ML Predictions
######
pred_ASM_robust_files <- list.files("Data/ML Prediction/Predictions",
"prediction_model_ASM_\\d+_individual_hp\\.dill\\.csv")
pred_ASM_robust_seeds <- pred_ASM_robust_files %>%
str_split("_|\\.") %>%
sapply(`[`, 5) %>%
as.numeric()
voted_asm <-
lapply(paste0("Data/ML Prediction/Predictions/", pred_ASM_robust_files), read_csv) %>%
bind_rows()
agg_png("./Figures/Figure-A-10-vote-share-across-seeds.png", width = 7, height = 4, units = "in", res = 300)
voted_asm %>%
group_by(gid) %>%
summarise("ASM Share Agreement" = ifelse(sum(ASM) > (n()/2), sum(ASM) /n(), sum(!ASM)/n()),
"ASM Prediction" = sum(ASM) > (n()/2)) %>%
ungroup() %>%
ggplot() +
geom_histogram(aes(`ASM Share Agreement`, fill = `ASM Prediction` ), bins = 10) +
labs(title = "Agreement of ASM Prediction for Different Seeds")
dev.off()
renv::snapshot()
source("~/Dropbox (ESOC - Princeton)/Extractives/Research/ETL/Replication - R/Code/main.R")
source("~/Dropbox (ESOC - Princeton)/Extractives/Research/ETL/Replication - R/Code/main.R")
source("~/Dropbox (ESOC - Princeton)/Extractives/Research/ETL/Replication - R/Code/main.R")
source("Code/figure-1.R")
source("~/Dropbox (ESOC - Princeton)/Extractives/Research/ETL/Replication - R/Code/main.R")
Version()
version()
sessioninfo()
